# 获得页面源代码
# re匹配需要的内容
from fileinput import close
from idlelib.iomenu import encoding

import requests
import re
import csv

# https://movie.douban.com/top250?start=100&filter=
url = "https://movie.douban.com/top250"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0'
}

resp = requests.get(url,headers=headers)
page_data = resp.text

# 解析数据
obj = re.compile(r'<li>.*?div class="item">.*?<span class="title">(?P<name>.*?)</span>'
                 r'.*?<p class="">.*?<br>(?P<year>.*?)&nbsp.*?<span class="rating_num" property="v:average">'
                 r'(?P<score>.*?)</span>.*?<span>(?P<num>.*?)人评价</span>',re.S)
# 匹配数据
result = obj.finditer(page_data)
f = open("data.csv",mode="w",encoding="utf-8",newline='')
csvwriter = csv.writer(f)
for it in result:
    print("电影名：",it.group("name"),"拍摄年份：",it.group("year").strip(),"评分：",it.group("score"),
          it.group("num"),"人给出评分")
    dic = it.groupdict()
    dic['year'] = dic['year'].strip()
    csvwriter.writerow(dic.values())

resp.close()
f.close()